# Import libraries
import numpy as np
import pandas as pd; import geopandas as gpd
import libpysal as ps
# visualization, basemap, interactive mapping
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from shapely.geometry import Point
import contextily as cx
import fiona
%matplotlib inline
import pydeck as pdk
import folium
import osmapi as osm
import osmnx as ox
from colorcet import fire
# For API
import requests
import json
import shapely
import pprint as pp
# pip install sodapy for downloading CalEnvScreen 3.0 data
from sodapy import Socrata
#For regression and GWR:
import statsmodels.api as sm
import numpy as np
from patsy import dmatrices
import esda as esda
import splot as splot
from splot.libpysal import plot_spatial_weights
from splot.esda import moran_scatterplot, plot_moran
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
from mgwr.utils import compare_surfaces, truncate_colormap
# ------- Data discription-------
# State-wide (CA): geographic, health status, environment and traffic, socio-economic status, ethnicity
# City-wide (SF): Physical activity (bike sharing, bike station, park, tree, sidewalk)
# Sources:
# https://oehha.ca.gov/calenviroscreen/report/calenviroscreen-30
# https://www2.census.gov/geo/tiger/GENZ2018/shp/
# https://data.sfgov.org/resource/7jbp-yzp3.json (bike sharing)
# https://opendata.arcgis.com/datasets/b8ad067022374acd966db3026ddab6fc_0.geojson (bike rack)
# https://data.sfgov.org/Transportation/Map-of-Sidewalk-Widths/ygcm-bt3x (sidewalk)
# ------- (Geo)Dataframe naming-------
# State level:
# df1: CA geographic data
# df2: CalEnviroScreen 3,0 data, including health outcome (e.g. asthma), environs (e.g. traffic, PM2.5, O3),
# socio-eco status (e.g. popverty, ethnicity), etc
# df3: merge df1 and df2
# City level:
# df5: sidewalk
# df6: bike
# df7: bike sharing
# ------ presentation format -------
# I referenced the following webpage to produce the slideshow for the recorded presentation
# https://medium.com/learning-machine-learning/present-your-data-science-projects-with-jupyter-slides-75f20735eb0f
# import geo data for CA and name it df1
df1 = gpd.read_file('data/cb_2018_06_tract_500k/cb_2018_06_tract_500k.shp')
# display first 3 columns of df1
df1.head(3)
| STATEFP | COUNTYFP | TRACTCE | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 06 | 009 | 000300 | 1400000US06009000300 | 06009000300 | 3 | CT | 457009794 | 394122 | POLYGON ((-120.76399 38.21389, -120.76197 38.2... |
| 1 | 06 | 011 | 000300 | 1400000US06011000300 | 06011000300 | 3 | CT | 952744514 | 195376 | POLYGON ((-122.50006 39.12232, -122.50022 39.1... |
| 2 | 06 | 013 | 303102 | 1400000US06013303102 | 06013303102 | 3031.02 | CT | 6507019 | 0 | POLYGON ((-121.72937 37.96884, -121.71409 37.9... |
# plot the geo data
df1.plot(color='none', edgecolor='gray', linewidth=.2, figsize=(5,10))
<AxesSubplot:>
# check the coordinate system
df1.crs
<Geographic 2D CRS: EPSG:4269> Name: NAD83 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: North America - NAD83 - bounds: (167.65, 14.92, -47.74, 86.46) Datum: North American Datum 1983 - Ellipsoid: GRS 1980 - Prime Meridian: Greenwich
# import CalEnvScreen 3.0 Data and name it df2, by following the instructions provided by the data source
# https://www.opendatanetwork.com/dataset/www.transparentrichmond.org/wi9v-3g3n
client = Socrata("www.transparentrichmond.org", None)
# Returned as JSON from API
results = client.get("wi9v-3g3n", limit=20000)
# Convert to pandas DataFrame
df2 = pd.DataFrame.from_records(results)
WARNING:root:Requests made without an app_token will be subject to strict throttling limits.
# display data, including health outcome (e.g. asthma), environs (e.g. traffic, PM2.5, O3, diesel PM),
# socioethnic status (e.g. popverty, education level, ethnicity)
df2.head(2)
| the_geom | tract | pop2010 | california | zip | city | longitude | latitude | ciscore | ciscorep | ... | african_am | native_ame | asian_amer | other_pct | objectid_1 | cidecile | civigintil | shape_leng | shape_area | ces2018_rn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | {'type': 'MultiPolygon', 'coordinates': [[[[-1... | 6083002103.0 | 3930 | Santa Barbara | 93454 | Santa Maria | -120.4270588 | 34.9306689 | 29.51 | 59 | ... | 1.9 | 0.5 | 7.2 | 1.6 | 3507 | 6 | 12 | 6999.35762193 | 2847611.28417 | 55-60% |
| 1 | {'type': 'MultiPolygon', 'coordinates': [[[[-1... | 6083002402.0 | 11406 | Santa Barbara | 93455 | Santa Maria | -120.4780833 | 34.9287963 | 33.17 | 65 | ... | 1.4 | 0.2 | 5.5 | 1.6 | 2733 | 7 | 14 | 19100.5780032 | 16352920.1391 | 65-70% |
2 rows × 71 columns
# display all the columns (i.e. variables)
df2.columns
Index(['the_geom', 'tract', 'pop2010', 'california', 'zip', 'city',
'longitude', 'latitude', 'ciscore', 'ciscorep', 'ozone', 'ozonep', 'pm',
'pmp', 'diesel', 'dieselp', 'drink', 'drinkp', 'pest', 'pestp',
'rseihaz', 'rseihazp', 'traffic', 'trafficp', 'cleanups', 'cleanupsp',
'gwthreats', 'gwthreatsp', 'haz', 'hazp', 'iwb', 'iwbp', 'swis',
'swisp', 'pollution', 'pollutions', 'pollutionp', 'asthma', 'asthmap',
'lbw', 'lbwp', 'cvd', 'cvdp', 'edu', 'edup', 'ling', 'lingp', 'pov',
'povp', 'unemp', 'unempp', 'housingb', 'housingbp', 'popchar',
'popcharsco', 'popcharp', 'children_u', 'pop_11_64_', 'elderly_ov',
'hispanic_p', 'white_pct', 'african_am', 'native_ame', 'asian_amer',
'other_pct', 'objectid_1', 'cidecile', 'civigintil', 'shape_leng',
'shape_area', 'ces2018_rn'],
dtype='object')
The variables of interest to this project include geographic data ('tract' and 'zip') as well as, those variables within four main dimensions, health status ('asthma','lbw','cvd'), environment and traffic ('traffic','diesel,'pm'), socio-economic status ('edu','ling','pov') and ethnicity ('white_pct','african_am', 'asian_amer'). For each variable employed in this project, the description is listed as follows:
'zip':Postal ZIP Code that the census tract falls within
'asthma': Age-adjusted rate of emergency department visits for asthma.
'lbw': Percent low birth weight
'cvd': Percent low birth weight
'traffic': Traffic density, in vehicle-kilometers per hour per road length, within 150 meters of the census tract boundary
'diesel': Diesel PM emissions from on-road and non-road sources
'pm': Annual mean PM 2.5 concentrations
'edu': Percent of population over 25 with less than a high school education
'ling': Percent limited English speaking households
'pov': Percent of population living below two times the federal poverty level
'white_pct': Percent of white households
'african_am': Percent of african households
'asian_amer': Percent of asian households
The description for other variables can be found via the link: https://oehha.ca.gov/calenviroscreen/report/calenviroscreen-30.
# Extract variables of interest
df2=df2[['tract','zip','pm','traffic', 'diesel','asthma','lbw','cvd','edu','ling','pov','white_pct','african_am','asian_amer']]
# (see the following two lines)
# Found two already imported dataset use different naming of polygon identification
df1.GEOID[:5]
0 06009000300 1 06011000300 2 06013303102 3 06013303202 4 06013303203 Name: GEOID, dtype: object
df2.tract[:5]
0 6083002103.0 1 6083002402.0 2 6083002102.0 3 6083002010.0 4 6083002009.0 Name: tract, dtype: object
# For df2, remove the last two character (e.g. ".0") in the string
for i in range(len(df2['tract'])):
if df2['tract'].iloc[i].find('.') != -1:
df2['tract'].iloc[i] = df2['tract'].iloc[i][:-2]
df2['tract'][:5]
0 6083002103 1 6083002402 2 6083002102 3 6083002010 4 6083002009 Name: tract, dtype: object
# For df1, remove the first character (i.e. "0")
df1.GEOID=df1.GEOID.map(lambda x: x[1:])
df1['GEOID'][:5]
0 6009000300 1 6011000300 2 6013303102 3 6013303202 4 6013303203 Name: GEOID, dtype: object
# merge the aforementioned two dataset (i.e. df1 and df2) as a new dataframe called df3
df3=pd.merge(df1, df2, left_on='GEOID', right_on='tract')
df3.head(2)
| STATEFP | COUNTYFP | TRACTCE | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | ... | diesel | asthma | lbw | cvd | edu | ling | pov | white_pct | african_am | asian_amer | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 06 | 009 | 000300 | 1400000US06009000300 | 6009000300 | 3 | CT | 457009794 | 394122 | POLYGON ((-120.76399 38.21389, -120.76197 38.2... | ... | 0.26 | 58.33 | 4.04 | 7.49 | 8.4 | 0.7 | 28.0 | 83.9 | 1.1 | 1.1 |
| 1 | 06 | 011 | 000300 | 1400000US06011000300 | 6011000300 | 3 | CT | 952744514 | 195376 | POLYGON ((-122.50006 39.12232, -122.50022 39.1... | ... | 1.53 | 30.94 | 4.83 | 9.47 | 41.5 | 12.5 | 39.8 | 26.6 | 0.9 | 2.0 |
2 rows × 24 columns
# (See the following two lines)
# make some thematic maps with columns 'traffic density' and 'households living with poverty'
df3["traffic"] = pd.to_numeric(df3["traffic"], downcast="float")
base = df3.plot(color='none', edgecolor='gray', linewidth=.2, figsize=(4,3))
df3.plot(ax=base, column='traffic',legend=True)
# The result is a bit odd, which would be solved afterwards
<AxesSubplot:>
df3["pov"] = pd.to_numeric(df3["pov"], downcast="float")
base = df3.plot(color='none', edgecolor='gray', linewidth=.2, figsize=(4,3))
df3.plot(ax=base, column='pov',legend=True)
# The result makes sense and I make a map for each variable in the next line of code and provide some descriptions.
<AxesSubplot:>
# Plot all variables with chloropleth maps for 12 variables within the four dimensions
# (i.e. health, socioeconomic, ethnicity, environs)
# cannot see a clear pattern of traffic density
f,(ax1,ax2,ax3) = plt.subplots(3,4,figsize=(30,16))
f.suptitle("Chloropleth Maps", fontsize=32)
# Health
df3["cvd"] = pd.to_numeric(df3["cvd"], downcast="float")
df3["lbw"] = pd.to_numeric(df3["lbw"], downcast="float")
df3["asthma"] = pd.to_numeric(df3["asthma"], downcast="float")
# Socio-economic
df3["pov"] = pd.to_numeric(df3["pov"], downcast="float")
df3["edu"] = pd.to_numeric(df3["edu"], downcast="float")
df3["ling"] = pd.to_numeric(df3["ling"], downcast="float")
# Ethnicity
df3["white_pct"] = pd.to_numeric(df3["white_pct"], downcast="float")
df3["asian_amer"] = pd.to_numeric(df3["asian_amer"], downcast="float")
df3["african_am"] = pd.to_numeric(df3["african_am"], downcast="float")
# Environs
df3["traffic"] = pd.to_numeric(df3["traffic"], downcast="float")
df3["pm"] = pd.to_numeric(df3["pm"], downcast="float")
df3["diesel"] = pd.to_numeric(df3["diesel"], downcast="float")
df3.plot(ax=ax1[0], column='cvd',legend=True, cmap="cet_CET_L8")
ax1[0].set_title("Cardiovascular Disease", fontsize=16)
df3.plot(ax=ax2[0], column='asthma',legend=True, cmap="cet_CET_L8")
ax2[0].set_title("Asthma", fontsize=16)
df3.plot(ax=ax3[0], column='lbw',legend=True, cmap="cet_CET_L8")
ax3[0].set_title("Low birth weight", fontsize=16)
df3.plot(ax=ax1[1], column='white_pct',legend=True, cmap='cet_fire')
ax1[1].set_title("% White Pop.", fontsize=16)
df3.plot(ax=ax2[1], column='asian_amer',legend=True, cmap='cet_fire')
ax2[1].set_title("% Asian American Pop.", fontsize=16)
df3.plot(ax=ax3[1], column='african_am',legend=True, cmap='cet_fire')
ax3[1].set_title("% African American Pop.", fontsize=16)
df3.plot(ax=ax1[2], column='pov',legend=True, cmap='cet_CET_D6')
ax1[2].set_title("poverty", fontsize=16)
df3.plot(ax=ax2[2], column='edu',legend=True, cmap='cet_CET_D6')
ax2[2].set_title("# below High School Edu", fontsize=16)
df3.plot(ax=ax3[2], column='ling',legend=True, cmap='cet_CET_D6')
ax3[2].set_title("Language barrier", fontsize=16)
df3.plot(ax=ax1[3], column='traffic',legend=True,cmap='cet_CET_D4')
ax1[3].set_title("traffic volume", fontsize=16)
df3.plot(ax=ax2[3], column='pm',legend=True, cmap='cet_CET_D4')
ax2[3].set_title("PM2.5", fontsize=16)
df3.plot(ax=ax3[3], column='diesel',legend=True, cmap='cet_CET_D4')
ax3[3].set_title("Diesel PM", fontsize=16);
f=plt.figure(figsize=(18, 6))
f.suptitle("Box Plots", fontsize=24)
plt.subplot(261)
plt.boxplot(df3.cvd)
plt.title('Cardiovascular disease', fontsize=10)
plt.subplot(262)
plt.boxplot(df3.lbw)
plt.title('Low birth weight', fontsize=10)
plt.subplot(263)
plt.boxplot(df3.asthma)
plt.title('Asthma', fontsize=10)
plt.subplot(264)
plt.boxplot(df3.pov)
plt.title('poverty', fontsize=10)
plt.subplot(265)
plt.boxplot(df3.edu)
plt.title('Education level', fontsize=10)
plt.subplot(266)
plt.boxplot(df3.ling)
plt.title('Linguistic barrier', fontsize=10)
plt.subplot(267)
plt.boxplot(df3.white_pct)
plt.title('% White households', fontsize=10)
plt.subplot(268)
plt.boxplot(df3.asian_amer)
plt.title('% Asian Am. households', fontsize=10)
plt.subplot(269)
plt.boxplot(df3.african_am)
plt.title('% African Am. households', fontsize=10)
plt.subplot(2,6,10)
plt.boxplot(df3.traffic)
plt.title('Traffic density', fontsize=10)
plt.subplot(2,6,11)
plt.boxplot(df3.diesel)
plt.title('Diesel PM', fontsize=10)
plt.subplot(2,6,12)
plt.boxplot(df3.pm)
plt.title('PM2.5 concentration', fontsize=10);
df3['traffic'].describe()
count 8034.000000 mean 936.348999 std 907.578857 min 0.000000 25% 437.057495 50% 695.369995 75% 1184.949982 max 45687.871094 Name: traffic, dtype: float64
# remove the outliers in 'traffic density'
low=df3['traffic'].quantile(.01)
high=df3['traffic'].quantile(.99)
high
3672.8413183593752
df3_f = df3[(df3['traffic'] < high) & (df3['traffic'] > low)]
df3_f['traffic'].describe()
count 7872.000000 mean 905.971558 std 672.920044 min 64.830002 25% 441.995003 50% 695.369995 75% 1170.357452 max 3671.219971 Name: traffic, dtype: float64
f=plt.figure(figsize=(18, 6))
f.suptitle("Box Plots", fontsize=24)
plt.subplot(261)
plt.boxplot(df3.cvd)
plt.title('Cardiovascular disease', fontsize=10)
plt.subplot(262)
plt.boxplot(df3.lbw)
plt.title('Low birth weight', fontsize=10)
plt.subplot(263)
plt.boxplot(df3.asthma)
plt.title('Asthma', fontsize=10)
plt.subplot(264)
plt.boxplot(df3.pov)
plt.title('poverty', fontsize=10)
plt.subplot(265)
plt.boxplot(df3.edu)
plt.title('Education level', fontsize=10)
plt.subplot(266)
plt.boxplot(df3.ling)
plt.title('Linguistic barrier', fontsize=10)
plt.subplot(267)
plt.boxplot(df3.white_pct)
plt.title('% White households', fontsize=10)
plt.subplot(268)
plt.boxplot(df3.asian_amer)
plt.title('% Asian Am. households', fontsize=10)
plt.subplot(269)
plt.boxplot(df3.african_am)
plt.title('% African Am. households', fontsize=10)
plt.subplot(2,6,10)
plt.boxplot(df3_f.traffic)
plt.title('Traffic density', fontsize=10)
plt.subplot(2,6,11)
plt.boxplot(df3.diesel)
plt.title('Diesel PM', fontsize=10)
plt.subplot(2,6,12)
plt.boxplot(df3.pm)
plt.title('PM2.5 concentration', fontsize=10);
plt.show();
# another way for descriptive statistics, i.e. histograms for observing data distribution
fig, ax = plt.subplots(2, 6,figsize=(20,5))
fig.suptitle("Histograms", fontsize=20)
m=12
# starting from 'pm' that is indexed as 12
for i in range(2):
for j in range(6):
df3_f.hist(column = df3_f.columns[m], ax=ax[i,j], figsize=(20, 18))
m+=1
;
''
# We have three geo-levels, i.e. tract, zip, city (SF).
# For data reduction, I used the groupby operation to produce mean values of some variables for every zip code
df3_1=df3_f.groupby(df3_f['zip']).mean()
df3_1.head()
| ALAND | AWATER | pm | traffic | diesel | asthma | lbw | cvd | edu | ling | pov | white_pct | african_am | asian_amer | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| zip | ||||||||||||||
| 32 | 3.003961e+09 | 20663794.0 | 6.32 | 117.139999 | 0.13 | 27.590000 | 3.12 | 5.72 | 26.0 | 12.1 | 45.500000 | 52.599998 | 0.6 | 0.6 |
| 35 | 3.191027e+09 | 7990907.0 | 4.88 | 87.540001 | 0.12 | 30.620001 | 0.00 | 10.97 | 8.4 | 0.5 | 39.099998 | 89.400002 | 0.6 | 0.4 |
| 40 | 2.163200e+09 | 35550930.0 | 5.26 | 106.870003 | 0.09 | 39.930000 | 6.73 | 11.68 | 10.3 | 0.0 | 35.500000 | 88.000000 | 0.4 | 1.0 |
| 48 | 1.193827e+09 | 10910635.0 | 3.63 | 76.430000 | 0.35 | 34.770000 | 3.59 | 7.17 | 21.0 | 0.8 | 56.000000 | 73.800003 | 1.0 | 0.7 |
| 51 | 1.219176e+09 | 26378126.0 | 6.28 | 154.729996 | 0.20 | 31.760000 | 3.50 | 6.55 | 5.4 | 0.0 | 31.000000 | 87.900002 | 0.2 | 0.7 |
len(df3)
8034
# Reduce data from 8034 to 1334 rows
len(df3_1)
1334
# white people has low lingustic barrier, as expected
lm=sns.lmplot("white_pct", "ling", data=df3_1, fit_reg=True, scatter=True, scatter_kws={"s": .1})
lm.fig.set_figwidth(3)
lm.fig.set_figheight(3)
# lower educational level positively correlate with low lingustic barrier, as expected
lm=sns.lmplot("edu", "ling", data=df3_1,scatter_kws={"s": .1})
lm.fig.set_figwidth(3)
lm.fig.set_figheight(3)
# diesel PM positively correlate with traffic that is one of the main sources of the diesel PM
lm=sns.lmplot("diesel", "traffic", data=df3_1,scatter_kws={"s": .1})
lm.fig.set_figwidth(3)
lm.fig.set_figheight(3)
# less affluent households tend to have health issue concerning low birth weight
lm=sns.lmplot("pov", "lbw", data=df3_1,scatter_kws={"s": 1})
lm.fig.set_figwidth(3)
lm.fig.set_figheight(3)
# places with more white households tend not to have asthmatic issue
lm=sns.lmplot("white_pct", "asthma", data=df3_1,scatter_kws={"s": 1})
lm.fig.set_figwidth(3)
lm.fig.set_figheight(3)
dfData = df3_1.iloc[:,2:].corr()
plt.subplots(figsize=(8, 8))
sns.heatmap(dfData, annot=True,vmax=1, vmin=-1, square=False, cmap="RdBu", alpha=.9, linewidths=2, linecolor='white', cbar=True)
plt.suptitle('Correlation Analysis', fontsize=20);
dfData = df3_1.iloc[:,5:11].corr()
plt.subplots(figsize=(6, 6))
mask = np.zeros_like(dfData)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(dfData, annot=True,vmax=1, vmin=-1,mask=mask, square=False, cmap="RdBu", alpha=.9, linewidths=2, linecolor='white', cbar=True)
plt.suptitle('Correlation Analysis', fontsize=15);
# tracts with more white households tend not to bear social and health burdens
sns.pairplot(df3_1,vars=["edu","ling","pov",'asthma',"white_pct"],height=1,diag_kind="kde",corner=True,plot_kws=dict(marker="+", linewidth=.1))
<seaborn.axisgrid.PairGrid at 0x1a4e6e85d0>
# Extract data with the label '075' in the column 'COUNTYFP' and display the first two rows.
SF = df3[df3['COUNTYFP']=='075']
SF.head(2)
| STATEFP | COUNTYFP | TRACTCE | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | ... | diesel | asthma | lbw | cvd | edu | ling | pov | white_pct | african_am | asian_amer | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 526 | 06 | 075 | 010200 | 1400000US06075010200 | 6075010200 | 102 | CT | 515958 | 295388 | POLYGON ((-122.42667 37.80964, -122.42488 37.8... | ... | 73.970001 | 43.160000 | 4.42 | 4.39 | 1.6 | 1.1 | 12.1 | 81.099998 | 0.6 | 10.200000 |
| 527 | 06 | 075 | 011200 | 1400000US06075011200 | 6075011200 | 112 | CT | 177414 | 0 | POLYGON ((-122.41629 37.79389, -122.41522 37.7... | ... | 111.129997 | 27.790001 | 5.13 | 3.71 | 10.6 | 13.1 | 26.0 | 55.099998 | 1.5 | 35.099998 |
2 rows × 24 columns
# There are 195 data points left.
len(SF)
195
# Plot the geodataframe SF
SF.plot(color='none', edgecolor='gray', linewidth=.2, figsize=(5,10))
<AxesSubplot:>
# Make box plots again to make sure no erroneous result
# No outliers found
plt.figure(figsize=(16, 4))
plt.subplot(261)
plt.boxplot(df3.cvd)
plt.title('Cardiovascular disease', fontsize=10)
plt.subplot(262)
plt.boxplot(df3.lbw)
plt.title('Low birth weight', fontsize=10)
plt.subplot(263)
plt.boxplot(df3.asthma)
plt.title('Asthma', fontsize=10)
plt.subplot(264)
plt.boxplot(df3.pov)
plt.title('poverty', fontsize=10)
plt.subplot(265)
plt.boxplot(df3.edu)
plt.title('Education level', fontsize=10)
plt.subplot(266)
plt.boxplot(df3.ling)
plt.title('Linguistic barrier', fontsize=10)
plt.subplot(267)
plt.boxplot(df3.white_pct)
plt.title('% White households', fontsize=10)
plt.subplot(268)
plt.boxplot(df3.asian_amer)
plt.title('% Asian Am. households', fontsize=10)
plt.subplot(269)
plt.boxplot(df3.african_am)
plt.title('% African Am. households', fontsize=10)
plt.subplot(2,6,10)
plt.boxplot(df3_f.traffic)
plt.title('Traffic density', fontsize=10)
plt.subplot(2,6,11)
plt.boxplot(df3.diesel)
plt.title('Diesel vehicle', fontsize=10)
plt.subplot(2,6,12)
plt.boxplot(df3.pm)
plt.title('PM2.5 concentration', fontsize=10);
dfData = SF[['pm','traffic','diesel','asthma','lbw','cvd','edu','ling','pov','white_pct','african_am','asian_amer']].corr()
plt.subplots(figsize=(8,8))
sns.heatmap(dfData, annot=True, vmax=1, square=False, cmap="RdBu", alpha=.9, linewidths=2, linecolor='white', cbar=True)
plt.show()
dfData = SF[['edu','ling','pov','white_pct','asian_amer','african_am']].corr()
plt.subplots(figsize=(4, 4))
sns.heatmap(dfData, annot=True, vmax=1, square=False, cmap="RdBu", alpha=.9, linewidths=2, linecolor='white', cbar=True)
plt.show()
f,(ax1,ax2,ax3) = plt.subplots(3,4,figsize=(30,16))
f.suptitle("Chloropleth Maps", fontsize=32)
# # Health
# SF["cvd"] = pd.to_numeric(SF["cvd"], downcast="float")
# SF["lbw"] = pd.to_numeric(SF["lbw"], downcast="float")
# SF["asthma"] = pd.to_numeric(SF["asthma"], downcast="float")
# # Socio-economic
# SF["pov"] = pd.to_numeric(SF["pov"], downcast="float")
# df3["edu"] = pd.to_numeric(SF["edu"], downcast="float")
# df3["ling"] = pd.to_numeric(SF["ling"], downcast="float")
# # Ethnicity
# df3["white_pct"] = pd.to_numeric(df3["white_pct"], downcast="float")
# df3["asian_amer"] = pd.to_numeric(df3["asian_amer"], downcast="float")
# df3["african_am"] = pd.to_numeric(df3["african_am"], downcast="float")
# # Environs
# df3["traffic"] = pd.to_numeric(df3["traffic"], downcast="float")
# df3["pm"] = pd.to_numeric(df3["pm"], downcast="float")
# df3["diesel"] = pd.to_numeric(df3["diesel"], downcast="float")
SF.plot(ax=ax1[0], column='cvd',legend=True, cmap="cet_fire")
ax1[0].set_title("Cardiovascular Disease", fontsize=16)
SF.plot(ax=ax2[0], column='asthma',legend=True, cmap="cet_fire")
ax2[0].set_title("Asthma", fontsize=16)
SF.plot(ax=ax3[0], column='lbw',legend=True, cmap="cet_fire")
ax3[0].set_title("Low birth weight", fontsize=16)
SF.plot(ax=ax1[1], column='pov',legend=True, cmap='cet_CET_L17_r')
ax1[1].set_title("poverty", fontsize=16)
SF.plot(ax=ax2[1], column='edu',legend=True, cmap='cet_CET_L17_r')
ax2[1].set_title("# below High School Edu", fontsize=16)
SF.plot(ax=ax3[1], column='ling',legend=True, cmap='cet_CET_L17_r')
ax3[1].set_title("Language barrier", fontsize=16)
SF.plot(ax=ax1[2], column='white_pct',legend=True, cmap='copper')
ax1[2].set_title("% White Pop.", fontsize=16)
SF.plot(ax=ax2[2], column='asian_amer',legend=True, cmap='copper')
ax2[2].set_title("% Asian American Pop.", fontsize=16)
SF.plot(ax=ax3[2], column='african_am',legend=True, cmap='copper')
ax3[2].set_title("% African American Pop.", fontsize=16)
SF.plot(ax=ax1[3], column='traffic',legend=True,cmap='cet_CET_D6_r')
ax1[3].set_title("traffic volume", fontsize=16)
SF.plot(ax=ax2[3], column='pm',legend=True, cmap='cet_CET_D6_r')
ax2[3].set_title("PM2.5", fontsize=16)
SF.plot(ax=ax3[3], column='diesel',legend=True, cmap='cet_CET_D6_r')
ax3[3].set_title("Diesel cars", fontsize=16);
# load sidewalk data
df5 = pd.read_csv("data/MTA.sidewalk_widths.csv")
df5.head(2)
| CNN | STREET | ST_TYPE | CLASS | WIDTH_MIN | WIDTH_RECO | SIDEWALK_F | SIDE | shape | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 105000.0 | 01ST | ST | Downtown Commercial | 10.0 | 12.0 | 15.0 | W | LINESTRING (-122.39702 37.78933, -122.39656 37... |
| 1 | 104002.0 | 01ST | ST | Downtown Commercial | 10.0 | 12.0 | 15.0 | W | LINESTRING (-122.397255 37.789516, -122.39702 ... |
# 16174 raws in total
len(df5)
16174
# the shape column is not really the "shape" for geopandas
type(df5['shape'][0])
str
# try to transform the string type to shapely.geometry.linestring type
# print only one result for checking
shapely.wkt.loads(df5['shape'][0])
# transforming the data type for all rows
df5['shape_'] = df5['shape'].apply(lambda x: shapely.wkt.loads(x))
df5.head(3)
| CNN | STREET | ST_TYPE | CLASS | WIDTH_MIN | WIDTH_RECO | SIDEWALK_F | SIDE | shape | shape_ | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 105000.0 | 01ST | ST | Downtown Commercial | 10.0 | 12.0 | 15.0 | W | LINESTRING (-122.39702 37.78933, -122.39656 37... | LINESTRING (-122.39702 37.78933, -122.39656 37... |
| 1 | 104002.0 | 01ST | ST | Downtown Commercial | 10.0 | 12.0 | 15.0 | W | LINESTRING (-122.397255 37.789516, -122.39702 ... | LINESTRING (-122.397255 37.789516, -122.39702 ... |
| 2 | 104001.0 | 01ST | ST | Downtown Commercial | 10.0 | 12.0 | 15.0 | W | LINESTRING (-122.39756 37.789757, -122.397255 ... | LINESTRING (-122.39756 37.789757, -122.397255 ... |
# transforming dataframe (df5) to GEOdataframe (gdf5)
gdf5 = gpd.GeoDataFrame(df5, crs=df1.crs, geometry=df5['shape_'])
gdf5.head(2)
| CNN | STREET | ST_TYPE | CLASS | WIDTH_MIN | WIDTH_RECO | SIDEWALK_F | SIDE | shape | shape_ | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 105000.0 | 01ST | ST | Downtown Commercial | 10.0 | 12.0 | 15.0 | W | LINESTRING (-122.39702 37.78933, -122.39656 37... | LINESTRING (-122.39702 37.78933, -122.39656 37... | LINESTRING (-122.39702 37.78933, -122.39656 37... |
| 1 | 104002.0 | 01ST | ST | Downtown Commercial | 10.0 | 12.0 | 15.0 | W | LINESTRING (-122.397255 37.789516, -122.39702 ... | LINESTRING (-122.397255 37.789516, -122.39702 ... | LINESTRING (-122.39726 37.78952, -122.39702 37... |
# displaying the sidewalk width range
gdf5['SIDEWALK_F'].describe()
count 16174.000000 mean 8.652776 std 30.897292 min -3.000000 25% 0.000000 50% 10.000000 75% 15.000000 max 1915.000000 Name: SIDEWALK_F, dtype: float64
# filter out those less than 15m (below 75% perceptile)
base = SF.plot(color='none', edgecolor='r', linewidth=.1, figsize=(6,6))
gdf5[gdf5['SIDEWALK_F']>=15].plot(ax=base,edgecolor='gray', linewidth=.2, figsize=(5,5), legend=True)
<AxesSubplot:>
# intersect SF (main dataset) with gdf5 (sidewalk data) and create a new geodataframe called sidewalk
sidewalk=gpd.overlay(gdf5[gdf5['SIDEWALK_F']>=15], SF, how='intersection')
# now every sidwalk data point have a reference column for "groupby"ing back
sidewalk.head(1)
| CNN | STREET | ST_TYPE | CLASS | WIDTH_MIN | WIDTH_RECO | SIDEWALK_F | SIDE | shape | shape_ | ... | asthma | lbw | cvd | edu | ling | pov | white_pct | african_am | asian_amer | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 105000.0 | 01ST | ST | Downtown Commercial | 10.0 | 12.0 | 15.0 | W | LINESTRING (-122.39702 37.78933, -122.39656 37... | LINESTRING (-122.39702 37.78933, -122.39656 37... | ... | 21.389999 | 5.85 | 3.79 | 4.6 | 8.7 | 19.0 | 54.700001 | 3.7 | 30.9 | LINESTRING (-122.39702 37.78933, -122.39656 37... |
1 rows × 34 columns
sidewalk['sidewalk_length']=sidewalk.length
# calculating sidewalk length and store the values to a new column called sidewalk_length
/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: UserWarning: Geometry is in a geographic CRS. Results from 'length' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. """Entry point for launching an IPython kernel.
# "groupby"ing back using "TRACTCE"
s=sidewalk.groupby('TRACTCE').sum()
len(SF)
195
# there are 16 census tracts without any sidewalk, because there should be 195 rows
len(s)
179
# Merge only the column 'sidewalk_length' back to SF
SF = SF.merge(s['sidewalk_length'],how='left', left_on='TRACTCE', right_index=True)
SF.plot(column='sidewalk_length',legend=True)
<AxesSubplot:>
# Show the null values in the 'sidewalk_length' column
SF[SF['sidewalk_length'].isnull()].head(2)
| STATEFP | COUNTYFP | TRACTCE | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | ... | asthma | lbw | cvd | edu | ling | pov | white_pct | african_am | asian_amer | sidewalk_length | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 528 | 06 | 075 | 012401 | 1400000US06075012401 | 6075012401 | 124.01 | CT | 91972 | 0 | POLYGON ((-122.41771 37.78424, -122.41607 37.7... | ... | 88.760002 | 5.63 | 6.05 | 31.400000 | 28.700001 | 71.199997 | 24.5 | 9.8 | 25.400000 | NaN |
| 532 | 06 | 075 | 017801 | 1400000US06075017801 | 6075017801 | 178.01 | CT | 214052 | 0 | POLYGON ((-122.40493 37.78150, -122.40271 37.7... | ... | 68.430000 | 7.48 | 7.92 | 36.299999 | 55.599998 | 58.299999 | 25.6 | 3.2 | 64.599998 | NaN |
2 rows × 25 columns
# fill up the null values with zeros
SF['sidewalk_length'].fillna(0, inplace=True)
# check the null values have been filled with zeros
SF[SF['sidewalk_length'].isnull()]
| STATEFP | COUNTYFP | TRACTCE | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | ... | asthma | lbw | cvd | edu | ling | pov | white_pct | african_am | asian_amer | sidewalk_length |
|---|
0 rows × 25 columns
# display the data again
# found that sidewalk length is greater in western part of SF
SF.plot(column='sidewalk_length',legend=True)
<AxesSubplot:>
# load bike rack data
# looking at the json format for how I should extract data I need
url = "https://data.sfgov.org/resource/hn4j-6fx5.geojson"
response = requests.get(url)
results = response.text
data = json.loads(results)
data['features'][0]
{'type': 'Feature',
'geometry': {'type': 'Point', 'coordinates': [-122.40533, 37.788067]},
'properties': {':@computed_region_6qbp_sg9q': '19',
':@computed_region_h4ep_8xdi': None,
'location': 'Hermes',
':@computed_region_26cr_cadq': '3',
'objectid': '12163',
'globalid': None,
':@computed_region_ajp5_b2md': '8',
'placement': 'SIDEWALK',
':@computed_region_qgnn_b9vv': '6',
'install_yr': '2004',
'install_mo': '0',
'address': '125 GRANT AVE',
'lon': '-122.405341',
'street': 'GRANT',
'racks': '1',
':@computed_region_jwn9_ihcz': '19',
':@computed_region_9jxd_iqea': '5',
':@computed_region_696y_nzzn': '43',
'spaces': '2',
'lat': '37.788072'}}
# Extract lat/lon data of bike racks
df6 = pd.DataFrame(columns=['lon','lat'])
for i in range(len(data['features'])):
df6.loc[i] = data['features'][i]['geometry']['coordinates']
df6.head
<bound method NDFrame.head of lon lat 0 -122.405330 37.788067 1 -122.420220 37.788628 2 -122.405640 37.789780 3 -122.392520 37.793137 4 -122.440060 37.729187 .. ... ... 995 -122.417050 37.764256 996 -122.429405 37.767498 997 -122.419010 37.758390 998 -122.412610 37.791610 999 -122.442170 37.752163 [1000 rows x 2 columns]>
# display the dataframe
shape = [Point(xy) for xy in zip(df6['lon'],df6['lat'])]
gdf6 = gpd.GeoDataFrame(df6, crs=df1.crs, geometry=shape)
gdf6.head()
| lon | lat | geometry | |
|---|---|---|---|
| 0 | -122.40533 | 37.788067 | POINT (-122.40533 37.78807) |
| 1 | -122.42022 | 37.788628 | POINT (-122.42022 37.78863) |
| 2 | -122.40564 | 37.789780 | POINT (-122.40564 37.78978) |
| 3 | -122.39252 | 37.793137 | POINT (-122.39252 37.79314) |
| 4 | -122.44006 | 37.729187 | POINT (-122.44006 37.72919) |
# plot the bike rack data
base = SF.plot(color='none', edgecolor='gray', linewidth=.1, figsize=(5,5))
gdf6.plot(ax=base, color='grey', markersize=.1)
<AxesSubplot:>
# using spatial join and groupby operation to prepare data
# counting bike rack numbers within each census tract
c=gpd.sjoin(SF, gdf6).groupby('GEOID').count()
# Randomly pick 'STATEFP' and store the count value to a new column called 'bikerack_count'
c['bikerack_count']=c['STATEFP']
c.head(1)
| STATEFP | COUNTYFP | TRACTCE | AFFGEOID | NAME | LSAD | ALAND | AWATER | geometry | tract | ... | ling | pov | white_pct | african_am | asian_amer | sidewalk_length | index_right | lon | lat | bikerack_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GEOID | |||||||||||||||||||||
| 6075010100 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | ... | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
1 rows × 28 columns
# merge the 'bikerack_count' and SF main dataset
SF=SF.merge(c['bikerack_count'],how='left',on='GEOID')
# fill up null values with zero to represent no bike rack found
SF['bikerack_count'].fillna(0, inplace=True)
# found that bike racks concentrate on the northeasten part of SF
SF.plot(column='bikerack_count')
<AxesSubplot:>
# load bikesharing station data
url="https://data.sfgov.org/resource/7jbp-yzp3.json"
response = requests.get(url)
results = response.text
# data = json.loads(results)
data= pd.read_json(results)
# transform the data's latlon to float type
# create a geodataframe
fil = lambda x: float(x)
shape = [Point(xy) for xy in zip(data.station_longitude.map(fil), data.station_latitude.map(fil))]
gdf7 = gpd.GeoDataFrame(data, crs=df1.crs, geometry=shape)
gdf7.head(2)
| station_id | station_name | station_id_domo | has_kiosk | dock_count | station_latitude | station_longitude | region_id | shape | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 17 | Embarcadero BART Station (Beale St at Market St) | SF-E29-3 | true | 29 | 37.792251 | -122.397086 | 3.0 | POINT (-122.39707 37.792248) | POINT (-122.39709 37.79225) |
| 1 | 30 | San Francisco Caltrain (Townsend St at 4th St) | SF-J29 | true | 19 | 37.776598 | -122.395282 | 3.0 | POINT (-122.39527 37.776592) | POINT (-122.39528 37.77660) |
# Drop missing data
gdf7=gdf7[gdf7.station_latitude!=0]
# display the result
# found bikesharing station concentrate northeastern part of SF, similar to bike rack facilities
base= SF.plot(color='none', edgecolor='gray', linewidth=.1, figsize=(4,4))
gdf7[gdf7["station_id_domo"].str.contains('SF')].plot(ax=base, markersize=.5)
<AxesSubplot:>
# create a new column to store a value that represents the minimum value to the closest bike sharing station
SF['min_dist_to_bs'] = SF.geometry.apply(lambda g: gdf7[gdf7["station_id_domo"].str.contains('SF')].distance(g).min())
/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: UserWarning: Geometry is in a geographic CRS. Results from 'distance' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
SF.head(2)
| STATEFP | COUNTYFP | TRACTCE | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | ... | cvd | edu | ling | pov | white_pct | african_am | asian_amer | sidewalk_length | bikerack_count | min_dist_to_bs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 06 | 075 | 010200 | 1400000US06075010200 | 6075010200 | 102 | CT | 515958 | 295388 | POLYGON ((-122.42667 37.80964, -122.42488 37.8... | ... | 4.39 | 1.6 | 1.1 | 12.1 | 81.099998 | 0.6 | 10.200000 | 0.035729 | 2.0 | 0.00000 |
| 1 | 06 | 075 | 011200 | 1400000US06075011200 | 6075011200 | 112 | CT | 177414 | 0 | POLYGON ((-122.41629 37.79389, -122.41522 37.7... | ... | 3.71 | 10.6 | 13.1 | 26.0 | 55.099998 | 1.5 | 35.099998 | 0.008231 | 1.0 | 0.00264 |
2 rows × 27 columns
# display the result, accssibility to bike sharing station
SF.plot(column='min_dist_to_bs',legend=True)
<AxesSubplot:>
SF.crs
<Geographic 2D CRS: EPSG:4269> Name: NAD83 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: North America - NAD83 - bounds: (167.65, 14.92, -47.74, 86.46) Datum: North American Datum 1983 - Ellipsoid: GRS 1980 - Prime Meridian: Greenwich
SF_3857 = SF.to_crs("EPSG:3857")
SF_3857.crs
<Projected CRS: EPSG:3857> Name: WGS 84 / Pseudo-Mercator Axis Info [cartesian]: - X[east]: Easting (metre) - Y[north]: Northing (metre) Area of Use: - name: World - 85°S to 85°N - bounds: (-180.0, -85.06, 180.0, 85.06) Coordinate Operation: - name: Popular Visualisation Pseudo-Mercator - method: Popular Visualisation Pseudo Mercator Datum: World Geodetic System 1984 - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# create two columns 'x' and 'y' to store x,y coordinates using centroid function
SF_3857["x"]=SF_3857.centroid.map(lambda p: p.x)
SF_3857["y"]=SF_3857.centroid.map(lambda p: p.y)
SF_3857.head(2)
| STATEFP | COUNTYFP | TRACTCE | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | ... | ling | pov | white_pct | african_am | asian_amer | sidewalk_length | bikerack_count | min_dist_to_bs | x | y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 06 | 075 | 010200 | 1400000US06075010200 | 6075010200 | 102 | CT | 515958 | 295388 | POLYGON ((-13628474.780 4552569.154, -13628274... | ... | 1.1 | 12.1 | 81.099998 | 0.6 | 10.200000 | 0.035729 | 2.0 | 0.00000 | -1.362797e+07 | 4.551968e+06 |
| 1 | 06 | 075 | 011200 | 1400000US06075011200 | 6075011200 | 112 | CT | 177414 | 0 | POLYGON ((-13627319.401 4550350.608, -13627199... | ... | 13.1 | 26.0 | 55.099998 | 1.5 | 35.099998 | 0.008231 | 1.0 | 0.00264 | -1.362692e+07 | 4.550221e+06 |
2 rows × 29 columns
fig, ax = plt.subplots(figsize = (6, 6))
SF_3857.plot(ax=ax, **{'edgecolor': 'black', 'facecolor': 'white'})
SF_3857.centroid.plot(ax = ax, c = 'black', markersize=10)
plt.show()
#Create choropleth quantile maps of each variable (including three new physical activity variables)
f,(ax1,ax2,ax3) = plt.subplots(3,5,figsize=(32,16))
# Health
SF_3857.plot(column='asthma', legend=True, ax=ax1[0],
scheme="quantiles", k=5, cmap='GnBu',edgecolor='black')
ax1[0].set_title("Asthma", fontsize=16)
SF_3857.plot(column='cvd', legend=True, ax=ax2[0],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax2[0].set_title("Cardiovascular disease", fontsize=16)
SF_3857.plot(column='lbw', legend=True, ax=ax3[0],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax3[0].set_title("Low birth weight", fontsize=16)
# Env
SF_3857.plot(column='diesel', legend=True, ax=ax1[1],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax1[1].set_title("Diesel cars", fontsize=16)
SF_3857.plot(column='traffic', legend=True, ax=ax2[1],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax2[1].set_title("Traffic density", fontsize=16)
SF_3857.plot(column='pm', legend=True, ax=ax3[1],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax3[1].set_title("PM2.5", fontsize=16)
# Socioeconomic
SF_3857.plot(column='edu', legend=True, ax=ax1[2],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax1[2].set_title("# people with education level below highschool", fontsize=16)
SF_3857.plot(column='ling', legend=True, ax=ax2[2],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax2[2].set_title("Linguistic barrier", fontsize=16)
SF_3857.plot(column='pov', legend=True, ax=ax3[2],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax3[2].set_title("Poverty", fontsize=16)
# Ethnicity
SF_3857.plot(column='white_pct', legend=True, ax=ax1[3],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax1[3].set_title("% white households", fontsize=16)
SF_3857.plot(column='asian_amer', legend=True, ax=ax2[3],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax2[3].set_title("% asian am households", fontsize=16)
SF_3857.plot(column='african_am', legend=True, ax=ax3[3],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax3[3].set_title("% african am households", fontsize=16)
# Physical activity
SF_3857.plot(column='sidewalk_length', legend=True, ax=ax1[4],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax1[4].set_title("Sidewalk length", fontsize=16)
SF_3857.plot(column='bikerack_count', legend=True, ax=ax2[4],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax2[4].set_title("# bike racks", fontsize=16)
SF_3857.plot(column='min_dist_to_bs', legend=True, ax=ax3[4],
scheme='quantiles', cmap='GnBu', k=5, edgecolor='black')
ax3[4].set_title("Accssibility to bikesharing station", fontsize=16)
for i in range(5):
ax1[i].axis('off')
ax2[i].axis('off')
ax3[i].axis('off')
cx.add_basemap(ax1[i], crs=SF_3857.crs.to_string(), source=cx.providers.Stamen.TonerLite)
cx.add_basemap(ax2[i], crs=SF_3857.crs.to_string(), source=cx.providers.Stamen.TonerLite)
cx.add_basemap(ax3[i], crs=SF_3857.crs.to_string(), source=cx.providers.Stamen.TonerLite)
/opt/anaconda3/lib/python3.7/site-packages/mapclassify/classifiers.py:236: UserWarning: Warning: Not enough unique values in array to form k classes
"Warning: Not enough unique values in array to form k classes", UserWarning
/opt/anaconda3/lib/python3.7/site-packages/mapclassify/classifiers.py:238: UserWarning: Warning: setting k to 1
Warn("Warning: setting k to %d" % k_q, UserWarning)
/opt/anaconda3/lib/python3.7/site-packages/mapclassify/classifiers.py:236: UserWarning: Warning: Not enough unique values in array to form k classes
"Warning: Not enough unique values in array to form k classes", UserWarning
/opt/anaconda3/lib/python3.7/site-packages/mapclassify/classifiers.py:238: UserWarning: Warning: setting k to 4
Warn("Warning: setting k to %d" % k_q, UserWarning)
dfData = SF[['traffic','diesel','pm','asthma','lbw','cvd','sidewalk_length', 'bikerack_count', 'min_dist_to_bs']].corr()
plt.subplots(figsize=(6, 6))
sns.heatmap(dfData, annot=True, vmax=1, square=False, cmap="RdBu", alpha=.9, linewidths=2, linecolor='white', cbar=True)
plt.show()
# First model - 12 variables with no interaction term
y, X = dmatrices('asthma ~ pm + traffic + diesel +edu +ling + pov + white_pct + african_am + asian_amer +sidewalk_length + bikerack_count + min_dist_to_bs',
data=SF_3857, return_type='dataframe')
mod = sm.OLS(y, X)
res = mod.fit()
residuals = res.resid
predicted = res.fittedvalues
observed = y
print(res.summary())
OLS Regression Results
==============================================================================
Dep. Variable: asthma R-squared: 0.685
Model: OLS Adj. R-squared: 0.665
Method: Least Squares F-statistic: 33.03
Date: Fri, 11 Dec 2020 Prob (F-statistic): 1.90e-39
Time: 12:22:27 Log-Likelihood: -817.60
No. Observations: 195 AIC: 1661.
Df Residuals: 182 BIC: 1704.
Df Model: 12
Covariance Type: nonrobust
===================================================================================
coef std err t P>|t| [0.025 0.975]
-----------------------------------------------------------------------------------
Intercept 68.5708 53.229 1.288 0.199 -36.455 173.597
pm 0.4203 5.834 0.072 0.943 -11.091 11.932
traffic 0.0025 0.003 0.945 0.346 -0.003 0.008
diesel 0.2006 0.064 3.143 0.002 0.075 0.327
edu 0.3906 0.301 1.300 0.195 -0.202 0.984
ling -0.3723 0.285 -1.308 0.193 -0.934 0.189
pov 0.0130 0.145 0.089 0.929 -0.273 0.299
white_pct -0.7298 0.161 -4.535 0.000 -1.047 -0.412
african_am 0.9759 0.214 4.558 0.000 0.553 1.398
asian_amer -0.4839 0.159 -3.048 0.003 -0.797 -0.171
sidewalk_length -70.5948 53.424 -1.321 0.188 -176.004 34.815
bikerack_count 0.3385 0.191 1.775 0.078 -0.038 0.715
min_dist_to_bs -748.8162 414.108 -1.808 0.072 -1565.886 68.254
==============================================================================
Omnibus: 44.648 Durbin-Watson: 1.891
Prob(Omnibus): 0.000 Jarque-Bera (JB): 94.490
Skew: 1.062 Prob(JB): 3.03e-21
Kurtosis: 5.668 Cond. No. 3.25e+05
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.25e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
SF_3857['bike_diesel_rack']=SF_3857['diesel']*SF_3857['bikerack_count']
SF_3857['bike_diesel_bshare']=SF_3857['diesel']*SF_3857['min_dist_to_bs']
# Second model - 14 variables with two interaction terms
y, X = dmatrices('asthma ~ pm + traffic + diesel +edu +ling + pov + white_pct + african_am + asian_amer +sidewalk_length + bikerack_count + min_dist_to_bs + bike_diesel_rack + bike_diesel_bshare',
data=SF_3857, return_type='dataframe')
mod = sm.OLS(y, X)
res = mod.fit()
residuals = res.resid
predicted = res.fittedvalues
observed = y
print(res.summary())
OLS Regression Results
==============================================================================
Dep. Variable: asthma R-squared: 0.696
Model: OLS Adj. R-squared: 0.672
Method: Least Squares F-statistic: 29.41
Date: Fri, 11 Dec 2020 Prob (F-statistic): 3.32e-39
Time: 12:22:27 Log-Likelihood: -814.31
No. Observations: 195 AIC: 1659.
Df Residuals: 180 BIC: 1708.
Df Model: 14
Covariance Type: nonrobust
======================================================================================
coef std err t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------------
Intercept 71.0737 52.690 1.349 0.179 -32.896 175.043
pm -0.6830 5.789 -0.118 0.906 -12.107 10.741
traffic 0.0026 0.003 0.972 0.333 -0.003 0.008
diesel 0.3152 0.080 3.938 0.000 0.157 0.473
edu 0.3331 0.298 1.117 0.266 -0.256 0.922
ling -0.4003 0.282 -1.418 0.158 -0.957 0.157
pov -0.0196 0.144 -0.136 0.892 -0.304 0.265
white_pct -0.7500 0.160 -4.702 0.000 -1.065 -0.435
african_am 1.0210 0.213 4.794 0.000 0.601 1.441
asian_amer -0.4255 0.159 -2.679 0.008 -0.739 -0.112
sidewalk_length -84.4573 53.291 -1.585 0.115 -189.612 20.698
bikerack_count 1.7988 0.644 2.795 0.006 0.529 3.069
min_dist_to_bs 589.5027 1233.198 0.478 0.633 -1843.882 3022.888
bike_diesel_rack -0.0177 0.007 -2.411 0.017 -0.032 -0.003
bike_diesel_bshare -31.3196 28.579 -1.096 0.275 -87.712 25.072
==============================================================================
Omnibus: 37.976 Durbin-Watson: 1.920
Prob(Omnibus): 0.000 Jarque-Bera (JB): 65.281
Skew: 1.001 Prob(JB): 6.68e-15
Kurtosis: 5.006 Cond. No. 1.08e+06
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.08e+06. This might indicate that there are
strong multicollinearity or other numerical problems.
# Third model (result of backward stepwise regression)
y, X = dmatrices('asthma ~diesel + white_pct + african_am + asian_amer + bikerack_count + bike_diesel_rack',
data=SF_3857, return_type='dataframe')
mod = sm.OLS(y, X)
res = mod.fit()
residuals = res.resid
predicted = res.fittedvalues
observed = y
print(res.summary())
OLS Regression Results
==============================================================================
Dep. Variable: asthma R-squared: 0.680
Model: OLS Adj. R-squared: 0.670
Method: Least Squares F-statistic: 66.61
Date: Fri, 11 Dec 2020 Prob (F-statistic): 6.35e-44
Time: 12:22:27 Log-Likelihood: -819.21
No. Observations: 195 AIC: 1652.
Df Residuals: 188 BIC: 1675.
Df Model: 6
Covariance Type: nonrobust
====================================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------------
Intercept 73.6118 9.983 7.374 0.000 53.919 93.305
diesel 0.3394 0.055 6.184 0.000 0.231 0.448
white_pct -0.8715 0.107 -8.115 0.000 -1.083 -0.660
african_am 0.9537 0.189 5.050 0.000 0.581 1.326
asian_amer -0.6537 0.118 -5.560 0.000 -0.886 -0.422
bikerack_count 1.6338 0.620 2.633 0.009 0.410 2.858
bike_diesel_rack -0.0163 0.007 -2.305 0.022 -0.030 -0.002
==============================================================================
Omnibus: 43.827 Durbin-Watson: 1.917
Prob(Omnibus): 0.000 Jarque-Bera (JB): 84.189
Skew: 1.089 Prob(JB): 5.23e-19
Kurtosis: 5.370 Cond. No. 6.36e+03
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 6.36e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
SF_3857.crs
SF_3857.reset_index(drop=True, inplace=True)
SF_3857.head(2)
# resetting value for caculating kernal weight
| STATEFP | COUNTYFP | TRACTCE | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | ... | white_pct | african_am | asian_amer | sidewalk_length | bikerack_count | min_dist_to_bs | x | y | bike_diesel_rack | bike_diesel_bshare | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 06 | 075 | 010200 | 1400000US06075010200 | 6075010200 | 102 | CT | 515958 | 295388 | POLYGON ((-13628474.780 4552569.154, -13628274... | ... | 81.099998 | 0.6 | 10.200000 | 0.035729 | 2.0 | 0.00000 | -1.362797e+07 | 4.551968e+06 | 147.940002 | 0.000000 |
| 1 | 06 | 075 | 011200 | 1400000US06075011200 | 6075011200 | 112 | CT | 177414 | 0 | POLYGON ((-13627319.401 4550350.608, -13627199... | ... | 55.099998 | 1.5 | 35.099998 | 0.008231 | 1.0 | 0.00264 | -1.362692e+07 | 4.550221e+06 | 111.129997 | 0.293383 |
2 rows × 31 columns
w = ps.weights.Kernel.from_dataframe(SF_3857, k=5, fixed=False, function='gaussian')
plot_spatial_weights(w, SF_3857, figsize=(5,5));
# w = ps.weights.Queen.from_dataframe(SF_3857)
# plot_spatial_weights(w, SF_3857, figsize=(5,5));
# Global Moran's I
y, X = dmatrices('asthma ~ pm + traffic + diesel +edu +ling + pov + white_pct + african_am + asian_amer +sidewalk_length + bikerack_count + min_dist_to_bs',
data=SF_3857, return_type='dataframe')
moran = esda.Moran(y, w)
moran.I
0.8232597476169629
# Plotting the result
# A Moran's I of .82 is quite high
fig, ax = moran_scatterplot(moran, aspect_equal=True)
plt.show()
# calculating Moran_Local and plotting the result
moran_loc = esda.moran.Moran_Local(y, w)
fig, ax = splot.esda.moran_scatterplot(moran_loc, p=0.05)
ax.set_xlabel('Asthma')
ax.set_ylabel('Spatial Lag of Asthma')
plt.show()
# plotting the result on the map
# two hotspots and one large coldspot can be found, which are very similar to the residential patterns of white and Asian households
moran_loc = esda.moran.Moran_Local(y,w)
# splot.esda.lisa_cluster(moran_loc, SF_3857, p=0.05, alpha=.9, figsize = (5,5))
splot.esda.plot_local_autocorrelation(moran_loc, SF_3857, 'asthma', p=0.01, quadrant=1)
plt.show()
#Prepare dataset inputs for GWR
g_y = SF_3857['asthma'].values.reshape((-1, 1))
g_X = SF_3857[['diesel', 'white_pct', 'african_am', 'asian_amer', 'bikerack_count','bike_diesel_rack']].values
u = SF_3857['x']
v = SF_3857['y']
g_coords = list(zip(u, v))
#Calibrate a GWR model for Chicago dataset using computationally selected~bandwidth (optimizes using AIC)
gwr_selector = Sel_BW(g_coords, g_y, g_X)
gwr_bw = gwr_selector.search()
print('Computationally selected bandwidth: ',gwr_bw)
gwr_model = GWR(g_coords, g_y, g_X, gwr_bw)
gwr_results = gwr_model.fit()
print('Residual sum of squares: ', gwr_results.resid_ss)
Computationally selected bandwidth: 83.0 Residual sum of squares: 34152.95836313484
gwr_results.summary();
# X1:'diesel', X2:'white_pct', X3:'african_am', X4:'asian_amer', X5:'bikerack_count', X6:'bike_diesel_rack'
=========================================================================== Model type Gaussian Number of observations: 195 Number of covariates: 7 Global Regression Results --------------------------------------------------------------------------- Residual sum of squares: 50885.124 Log-likelihood: -819.215 AIC: 1652.430 AICc: 1655.204 BIC: 49893.800 R2: 0.680 Adj. R2: 0.670 Variable Est. SE t(Est/SE) p-value ------------------------------- ---------- ---------- ---------- ---------- X0 73.612 9.983 7.374 0.000 X1 0.339 0.055 6.184 0.000 X2 -0.871 0.107 -8.115 0.000 X3 0.954 0.189 5.050 0.000 X4 -0.654 0.118 -5.560 0.000 X5 1.634 0.620 2.633 0.008 X6 -0.016 0.007 -2.305 0.021 Geographically Weighted Regression (GWR) Results --------------------------------------------------------------------------- Spatial kernel: Adaptive bisquare Bandwidth used: 83.000 Diagnostic information --------------------------------------------------------------------------- Residual sum of squares: 34152.958 Effective number of parameters (trace(S)): 28.079 Degree of freedom (n - trace(S)): 166.921 Sigma estimate: 14.304 Log-likelihood: -780.339 AIC: 1618.837 AICc: 1629.444 BIC: 1714.012 R2: 0.785 Adjusted R2: 0.749 Adj. alpha (95%): 0.012 Adj. critical t value (95%): 2.522 Summary Statistics For GWR Parameter Estimates --------------------------------------------------------------------------- Variable Mean STD Min Median Max -------------------- ---------- ---------- ---------- ---------- ---------- X0 67.177 63.347 -90.596 58.075 167.398 X1 0.251 0.211 -0.103 0.198 0.866 X2 -0.717 0.584 -1.660 -0.644 0.966 X3 0.515 0.897 -1.357 0.552 2.427 X4 -0.388 0.672 -1.480 -0.348 1.153 X5 1.115 1.807 -4.181 1.848 3.444 X6 -0.001 0.031 -0.040 -0.014 0.104 ===========================================================================
#Prepare GWR results for mapping
#Add GWR parameters to GeoDataframe
SF_3857['gwr_intercept'] = gwr_results.params[:,0]
SF_3857['gwe_diesel'] = gwr_results.params[:,1]
SF_3857['gwr_white'] = gwr_results.params[:,2]
SF_3857['gwr_african'] = gwr_results.params[:,3]
SF_3857['gwr_asian'] = gwr_results.params[:,4]
SF_3857['gwr_bikerack_count'] = gwr_results.params[:,5]
#Obtain t-vals filtered based on multiple testing correction
gwr_filtered_t = gwr_results.filter_tvals()
# Plot estimators
fig, (axes1, axes2) = plt.subplots(nrows=2, ncols=3, figsize=(12,6), constrained_layout=True)
ax0 = axes1[0]
ax0.set_title('GWR Intercept Surface (BW: ' + str(gwr_bw) +')', fontsize=10)
ax1 = axes1[1]
ax1.set_title('GWR diesel Surface (BW: ' + str(gwr_bw) +')', fontsize=10)
ax2 = axes1[2]
ax2.set_title('GWR white Surface (BW: ' + str(gwr_bw) +')', fontsize=10)
ax3 = axes2[0]
ax3.set_title('GWR african Surface (BW: ' + str(gwr_bw) +')', fontsize=10)
ax4 = axes2[1]
ax4.set_title('GWR asian Surface (BW: ' + str(gwr_bw) +')', fontsize=10)
ax5 = axes2[2]
ax5.set_title('GWR bike rack count (BW: ' + str(gwr_bw) +')', fontsize=10);
SF_3857.plot('gwr_intercept', legend=True, cmap='seismic', ax=ax0, **{'edgecolor':'black', 'alpha':.65},legend_kwds={'shrink': .9})
if (gwr_filtered_t[:,0] == 0).any():
SF_3857[gwr_filtered_t[:,0] == 0].plot(color='lightgrey', ax=ax0, **{'edgecolor':'black'})
SF_3857.plot('gwe_diesel', legend=True,cmap='seismic', ax=ax1, **{'edgecolor':'black', 'alpha':.65},legend_kwds={'shrink': .9})
if (gwr_filtered_t[:,1] == 0).any():
SF_3857[gwr_filtered_t[:,1] == 0].plot(color='lightgrey', ax=ax1, **{'edgecolor':'black'})
SF_3857.plot('gwr_white', legend=True, cmap='seismic', ax=ax2, **{'edgecolor':'black', 'alpha':.65},legend_kwds={'shrink': .9})
if (gwr_filtered_t[:,2] == 0).any():
SF_3857[gwr_filtered_t[:,2] == 0].plot(color='lightgrey', ax=ax2, **{'edgecolor':'black'})
SF_3857.plot('gwr_african', legend=True, cmap='seismic', ax=ax3, **{'edgecolor':'black', 'alpha':.65},legend_kwds={'shrink': .9})
if (gwr_filtered_t[:,3] == 0).any():
SF_3857[gwr_filtered_t[:,3] == 0].plot(color='lightgrey', ax=ax3, **{'edgecolor':'black'})
SF_3857.plot('gwr_asian', legend=True, cmap='seismic', ax=ax4, **{'edgecolor':'black', 'alpha':.65},legend_kwds={'shrink': .9})
if (gwr_filtered_t[:,4] == 0).any():
SF_3857[gwr_filtered_t[:,4] == 0].plot(color='lightgrey', ax=ax4, **{'edgecolor':'black'})
SF_3857.plot('gwr_bikerack_count', legend=True, cmap='seismic', ax=ax5, **{'edgecolor':'black', 'alpha':.65},legend_kwds={'shrink': .9})
if (gwr_filtered_t[:,5] == 0).any():
SF_3857[gwr_filtered_t[:,5] == 0].plot(color='lightgrey', ax=ax5, **{'edgecolor':'black'});
ax0.get_xaxis().set_visible(False)
ax0.get_yaxis().set_visible(False)
ax1.get_xaxis().set_visible(False)
ax1.get_yaxis().set_visible(False)
ax2.get_xaxis().set_visible(False)
ax2.get_yaxis().set_visible(False)
ax3.get_xaxis().set_visible(False)
ax3.get_yaxis().set_visible(False)
ax4.get_xaxis().set_visible(False)
ax4.get_yaxis().set_visible(False)
ax5.get_xaxis().set_visible(False)
ax5.get_yaxis().set_visible(False);
#Local model fit
SF_3857['R2'] = gwr_results.localR2
SF_3857.plot('R2', legend = True, figsize=(5,5),legend_kwds={'shrink': .8})
ax = plt.gca()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
#plt.savefig('local_R2')
plt.show()
VIF = gwr_results.local_collinearity()[1]
VDP = gwr_results.local_collinearity()[3]
names = ['Diesel','White','African','Asian','Bike','interaction']
fig, ax = plt.subplots(1, 6, figsize = (20, 5))
for col in range(6):
SF_3857['vif'] = VIF[:, col]
SF_3857.plot('vif', ax = ax[col], legend = True, legend_kwds={'shrink': .5})
ax[col].set_title('VIF: ' + names[col])
ax[col].get_xaxis().set_visible(False)
ax[col].get_yaxis().set_visible(False)
fig, ax = plt.subplots(1, 6, figsize = (20, 5))
for col in range(6):
SF_3857['vdp'] = VDP[:, col]
SF_3857.plot('vdp', ax = ax[col], legend = True, legend_kwds={'shrink': .5})
ax[col].set_title('VDP: ' + names[col])
ax[col].get_xaxis().set_visible(False)
ax[col].get_yaxis().set_visible(False)
# Display bike rack data using Gridlayer from Mapbox
bike_rack = pdk.Layer(
'GridLayer',
df6,
get_position=['lon', 'lat'],
auto_highlight=True,
elevation_scale=150,
pickable=True,
elevation_range=[0, 30],
extruded=True,
opacity=.3,
coverage=.7)
view_state = pdk.ViewState(
longitude=-122.415,
latitude=37.8023,
zoom=9,
min_zoom=1,
max_zoom=15,
pitch=50,
bearing=30)
r = pdk.Deck(layers=[bike_rack], initial_view_state=view_state)
r.to_html('Grid.html')
# Display bike sharing station data using Heatmaplayer from Mapbox
view = pdk.ViewState(
longitude=-122.455,
latitude=37.7423,
zoom=11,
min_zoom=1,
max_zoom=15)
COLOR_SCALE = [
[90, 249, 232],
[70, 204, 197],
[55, 150, 150],
[20, 100, 100],
]
bike_sharing = pdk.Layer(
"HeatmapLayer",
data=gdf7,
opacity=0.9,
get_position=["station_longitude", "station_latitude"],
aggregation='"MEAN"',
color_range=COLOR_SCALE,
threshold=1,
get_weight="dock_count",
pickable=True,
)
r = pdk.Deck(
layers=bike_sharing,
initial_view_state=view,
tooltip={"text": "Concentration of bike-sharing decks in orange"},
)
r.to_html("heatmap_layer.html")
# Create a osmnx-graph called G_bike to represent bike network and displaying it
G_bike = ox.graph.graph_from_bbox(37.86,37.7,-122.325,-122.55,network_type='bike')
ox.plot.plot_graph(G_bike,ax=None, figsize=(8, 8), bgcolor='#111111',
node_color='w', node_size=.5, node_alpha=None, node_edgecolor='none', node_zorder=1,
edge_color='#999999', edge_linewidth=.1, edge_alpha=None, bbox=None)
(<Figure size 576x576 with 1 Axes>, <AxesSubplot:>)
# input data for origin and destination
# here I assume that I am going to "Taiwan Restaurant, SF" from "Hunter's Point, SF"
origin_input = input()
destination_input = input()
Hunter's Point, SF Taiwan Restaurant, SF
# using geocoder to transform the string-type address to latlon type coordinates
origin = ox.geocoder.geocode(origin_input)
destination = ox.geocoder.geocode(destination_input)
# showing origin's latlon
origin
(37.7267715, -122.3715718)
# showing destination's latlon
destination
(37.7828379, -122.464162)
# calculting the distance between the OD pair
ox.distance.euclidean_dist_vec(origin[0], origin[1], destination[0], destination[1])
0.10824225766769899
# getting the nearest edge ID for origin and destination
ii_bike=ox.distance.get_nearest_edge(G_bike, origin)
jj_bike=ox.distance.get_nearest_edge(G_bike, destination)
# showing edge ID
ii_bike
(65320592, 65322415, 0)
# showing edge ID
jj_bike
(65287668, 65302772, 0)
# showing edge IDs along the shortest path for the OD pair
path_bike = ox.distance.shortest_path(G_bike, ii_bike[0], jj_bike[0], weight='length')
path_bike[:10]
[65320592, 65320588, 65320580, 65340028, 65366369, 65355023, 6923676856, 6923676854, 65357496, 5443154347]
# the total count of the edges on the shortest path
len(path_bike)
193
# displaying all the info for all edges
edges = ox.utils_graph.get_route_edge_attributes(G_bike, path_bike, attribute=None, minimize_key='length', retrieve_default=None)
edges[:2]
[{'osmid': 8917551,
'name': 'Navy Road',
'highway': 'residential',
'oneway': False,
'length': 66.526,
'geometry': <shapely.geometry.linestring.LineString at 0x1a68df0110>},
{'osmid': 8917551,
'name': 'Navy Road',
'highway': 'residential',
'oneway': False,
'length': 23.156,
'geometry': <shapely.geometry.linestring.LineString at 0x1a68df0610>}]
# Mapping the shortest route and diesel PM onto the folium platform
diesel_map = folium.Map([37.7556, -122.4399], zoom_start = 12)
tracts = SF.set_index('TRACTCE')
SF_wgs84 = SF.to_crs({'init': 'epsg:4326'})
folium.Choropleth(
geo_data=SF,
name='choropleth',
data=SF,
columns=['TRACTCE','diesel'],
key_on = 'feature.properties.{}'.format('TRACTCE'),
fill_color='YlOrBr',
fill_opacity=0.7,
line_opacity=0.2,
legend_name='Diesel Emission'
).add_to(diesel_map)
route_diesel_map=ox.folium.plot_route_folium(G_bike, path_bike, route_map=diesel_map, fit_bounds=False,
popup_attribute=None, route_color='#5e8277', route_width=5, route_opacity=.9)
route_diesel_map
/opt/anaconda3/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
return _prepare_from_string(" ".join(pjargs))
# using nodeget function to derive the latlon for each edge and store them in a list called node_lst
# i.e. transforming edge ID "back" to latlon format
api = osm.OsmApi()
node_lst=[]
for i in range(len(path_bike)):
node_lst.append(api.NodeGet(path_bike[i]))
node_lst[:2]
[{'id': 65320592,
'visible': True,
'version': 2,
'changeset': 2102778,
'timestamp': datetime.datetime(2009, 8, 10, 21, 59, 29),
'user': 'woodpeck_fixbot',
'uid': 147510,
'lat': 37.727563,
'lon': -122.371273,
'tag': {}},
{'id': 65320588,
'visible': True,
'version': 2,
'changeset': 2102778,
'timestamp': datetime.datetime(2009, 8, 10, 21, 59, 29),
'user': 'woodpeck_fixbot',
'uid': 147510,
'lat': 37.727748,
'lon': -122.371954,
'tag': {}}]
# create a list called node_latlon to store latlon data extracted from node_lst
node_latlon=[]
for i in range(len(node_lst)):
node_latlon.append(list([node_lst[i]["lon"], node_lst[i]["lat"]]))
type(node_latlon)
list
# check the data format is correct as expected
node_latlon[0]
[-122.371273, 37.727563]
# create a dataframe called d, with two columns which will be filled with coordiniate and timestamp data
# transform node_latlon to pd.Series
d = pd.DataFrame(columns=['coordinates','timestamps'])
pd.Series(node_latlon)
0 [-122.371273, 37.727563]
1 [-122.371954, 37.727748]
2 [-122.3722079, 37.7277957]
3 [-122.3724341, 37.7282512]
4 [-122.3725856, 37.7283456]
...
188 [-122.4604941, 37.7812627]
189 [-122.4610901, 37.7812355]
190 [-122.4612247, 37.7830976]
191 [-122.4622971, 37.7830489]
192 [-122.4633696, 37.7830001]
Length: 193, dtype: object
# randomly insert 1 in the first row of d
# inserting latlon data into the column 'coordinates' of d
d.loc[0]=1
d['coordinates'][0]=node_latlon
d['coordinates']
0 [[-122.371273, 37.727563], [-122.371954, 37.72... Name: coordinates, dtype: object
# assuming that each travel step takes 1 sec
# here, I did not use the "actual" estimate of travel time as I was not able to figure out how,
# so I just used 1, 2, 3 etc to represent the travel timestampstime_lst=[]
time_lst=[]
for i in range(len(node_latlon)):
time_lst.append(i)
d['timestamps'][0]=time_lst
d
| coordinates | timestamps | |
|---|---|---|
| 0 | [[-122.371273, 37.727563], [-122.371954, 37.72... | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,... |
# mapping the shortest route onto another platform, Mapbox
layer = pdk.Layer(
"TripsLayer",
d,
get_path="coordinates",
get_timestamps="timestamps",
get_color=[200, 50, 150],
opacity=1,
width_min_pixels=10,
rounded=False,
trail_length=500,
current_time=300,
)
view_state = pdk.ViewState(latitude=37.7749295, longitude=-122.4194155, zoom=10, bearing=50, pitch=45)
r = pdk.Deck(layers=[layer,bike_rack,bike_sharing], initial_view_state=view_state)
# r = pdk.Deck(layers=layer, initial_view_state=view_state)
r.to_html("trips_layer.html")
Thanks!